{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "a2b862ab",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(34,\n",
       " ['accuracy',\n",
       "  'bertscore',\n",
       "  'bleu',\n",
       "  'bleurt',\n",
       "  'cer',\n",
       "  'chrf',\n",
       "  'code_eval',\n",
       "  'comet',\n",
       "  'competition_math',\n",
       "  'coval',\n",
       "  'cuad',\n",
       "  'f1',\n",
       "  'gleu',\n",
       "  'glue',\n",
       "  'google_bleu',\n",
       "  'indic_glue',\n",
       "  'matthews_correlation',\n",
       "  'mauve',\n",
       "  'meteor',\n",
       "  'pearsonr',\n",
       "  'precision',\n",
       "  'recall',\n",
       "  'rouge',\n",
       "  'sacrebleu',\n",
       "  'sari',\n",
       "  'seqeval',\n",
       "  'spearmanr',\n",
       "  'squad',\n",
       "  'squad_v2',\n",
       "  'super_glue',\n",
       "  'ter',\n",
       "  'wer',\n",
       "  'wiki_split',\n",
       "  'xnli'])"
      ]
     },
     "execution_count": 1,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from datasets import list_metrics\n",
    "\n",
    "#列出评价指标\n",
    "metrics_list = list_metrics()\n",
    "\n",
    "len(metrics_list), metrics_list"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "id": "d699a36e",
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "\n",
      "Compute GLUE evaluation metric associated to each GLUE dataset.\n",
      "Args:\n",
      "    predictions: list of predictions to score.\n",
      "        Each translation should be tokenized into a list of tokens.\n",
      "    references: list of lists of references for each translation.\n",
      "        Each reference should be tokenized into a list of tokens.\n",
      "Returns: depending on the GLUE subset, one or several of:\n",
      "    \"accuracy\": Accuracy\n",
      "    \"f1\": F1 score\n",
      "    \"pearson\": Pearson Correlation\n",
      "    \"spearmanr\": Spearman Correlation\n",
      "    \"matthews_correlation\": Matthew Correlation\n",
      "Examples:\n",
      "\n",
      "    >>> glue_metric = datasets.load_metric('glue', 'sst2')  # 'sst2' or any of [\"mnli\", \"mnli_mismatched\", \"mnli_matched\", \"qnli\", \"rte\", \"wnli\", \"hans\"]\n",
      "    >>> references = [0, 1]\n",
      "    >>> predictions = [0, 1]\n",
      "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
      "    >>> print(results)\n",
      "    {'accuracy': 1.0}\n",
      "\n",
      "    >>> glue_metric = datasets.load_metric('glue', 'mrpc')  # 'mrpc' or 'qqp'\n",
      "    >>> references = [0, 1]\n",
      "    >>> predictions = [0, 1]\n",
      "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
      "    >>> print(results)\n",
      "    {'accuracy': 1.0, 'f1': 1.0}\n",
      "\n",
      "    >>> glue_metric = datasets.load_metric('glue', 'stsb')\n",
      "    >>> references = [0., 1., 2., 3., 4., 5.]\n",
      "    >>> predictions = [0., 1., 2., 3., 4., 5.]\n",
      "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
      "    >>> print({\"pearson\": round(results[\"pearson\"], 2), \"spearmanr\": round(results[\"spearmanr\"], 2)})\n",
      "    {'pearson': 1.0, 'spearmanr': 1.0}\n",
      "\n",
      "    >>> glue_metric = datasets.load_metric('glue', 'cola')\n",
      "    >>> references = [0, 1]\n",
      "    >>> predictions = [0, 1]\n",
      "    >>> results = glue_metric.compute(predictions=predictions, references=references)\n",
      "    >>> print(results)\n",
      "    {'matthews_correlation': 1.0}\n",
      "\n"
     ]
    }
   ],
   "source": [
    "from datasets import load_metric\n",
    "\n",
    "#加载一个评价指标\n",
    "metric = load_metric('glue', 'mrpc')\n",
    "\n",
    "print(metric.inputs_description)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "5711e3ea",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "{'accuracy': 0.6666666666666666, 'f1': 0.6666666666666666}"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "#计算一个评价指标\n",
    "predictions = [0, 1, 0]\n",
    "references = [0, 1, 1]\n",
    "\n",
    "final_score = metric.compute(predictions=predictions, references=references)\n",
    "\n",
    "final_score"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.13"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
